Â
1. Load and aggregate data
# Load required packages
library(readxl)
library(tidyr)
library(dplyr)
library(ggplot2)
library(smacof)
library(plotly)
library(MASS)
library(fitdistrplus)
library(extremevalues)
library(here)
library(patchwork)
library(scatterplot3d)
library(rgl)
library(grid)
library(gridExtra)
library(viridis)
library(shapes)
library(matrixStats)
library(shiny)
library(shinydashboard)
library(andrews)
library(pheatmap)
library(smacof)
library(reshape2)
# Set working directory
mds_dir <- here::here()
# Load data
df_full <- read.csv(file = paste0(mds_dir,"/MDS_ASC_data"), sep = "\t", header = TRUE, quote = "\"")
# Basic demographic stats before filtering
df_full %>%
summarise(
Number_of_Participants = n(),
Mean_Age = mean(Age, na.rm = TRUE),
SD_Age = sd(Age, na.rm = TRUE),
Male_Participants = sum(Gender == "Man"),
Female_Participants = sum(Gender == "Woman"),
Other_Participants = sum(Gender == "Other")
) %>%
print()
## Number_of_Participants Mean_Age SD_Age Male_Participants
## 1 785 30.10828 8.328384 412
## Female_Participants Other_Participants
## 1 359 14
## Perform exclusions according to the preregistered criteria
#(1) Average response time per dissimilarity rating below 2 seconds
#hist(df_full$time_per_subs, breaks = 100)
cat("Response time above 2s:", sum(df_full$time_per_subs < 2, na.rm = TRUE), "\n")
## Response time above 2s: 0
df_filtered <- filter(df_full, time_per_subs > 2)
#(2) Inaccurate responses to the two control questions
# Test 1
# hist(df_filtered$test1, breaks = 100, xlim = c(0,100), ylim = c(0,50))
cat("Uncorrect response to test1:", sum(df_filtered$test1 < 99, na.rm = TRUE), "\n")
## Uncorrect response to test1: 10
df_filtered <- filter(df_filtered, test1 >= 99)
# Test2
# hist(df_filtered$test2, breaks = 100, xlim = c(0,7), ylim = c(0,100))
cat("Uncorrect response to test2:", sum(df_filtered$test2 > 1, na.rm = TRUE), "\n")
## Uncorrect response to test2: 36
df_filtered <- filter(df_filtered, test2 <= 1) # liberal (2nd step)
# Recode ID variable
df_filtered$ID <- 1:length(df_filtered$ID)
# Basic demographic stats after filtering
df_filtered %>%
summarise(
Number_of_Participants = n(),
Male = sum(Gender == "Man"),
Female = sum(Gender == "Woman"),
Other = sum(Gender == "Other"),
Mean_Age = mean(Age, na.rm = TRUE),
SD_Age = sd(Age, na.rm = TRUE),
) %>%
print()
## Number_of_Participants Male Female Other Mean_Age SD_Age
## 1 739 385 340 14 29.82409 8.087188
colnames(df_filtered) <- gsub("X2CB", "2CB", colnames(df_filtered))
# Define substance codes
substance_codes <- c("Baseline", "Alc", "MJ", "MDMA", "Amf", "LSD", "Psy", "Mef", "Coc",
"Alp", "Ket", "DMT", "N2O", "DXM", "Cod", "Tra", "Her", "Salv",
"GHB", "Dat", "Ben", "2CB", "Diph")
# Create all combinations of substance codes
combinations <- expand.grid(Var1 = substance_codes, Var2 = substance_codes, stringsAsFactors = FALSE)
combinations <- subset(combinations, Var1 != Var2)
combinations <- subset(combinations, Var1 < Var2)
column_names <- apply(combinations, 1, function(x) paste(x, collapse = "_"))
# Loop through each subject's data
for (i in as.numeric(df_filtered$ID)) {
subject_id <- i
# Extract data for the current subject
subject_data <- df_filtered[df_filtered$ID == subject_id, ]
# Create a data frame with substance codes as both rows and columns
subject_df <- data.frame(matrix(NA, nrow = length(substance_codes), ncol = length(substance_codes),dimnames = list(substance_codes, substance_codes)))
colnames(subject_df) <- substance_codes
rownames(subject_df) <- substance_codes
# Populate the data frame with values from the corresponding columns
for (code1 in substance_codes) {
for (code2 in substance_codes) {
col_name1 <- paste(code1, code2, sep = "")
col_name2 <- paste(code2, code1, sep = "")
if (col_name1 != col_name2) {
if (!is.na(subject_data[[col_name1]])) {
subject_df[code1, code2] <- subject_data[[col_name1]]
subject_df[code2, code1] <- subject_data[[col_name1]]
} else if (!is.na(subject_data[[col_name2]])) {
subject_df[code1, code2] <- subject_data[[col_name2]]
subject_df[code2, code1] <- subject_data[[col_name2]]
}
}
}
}
# Assign the subject's data frame to the global environment with a unique name
assign(paste("df_", subject_id, sep = ""), subject_df, envir = .GlobalEnv)
}
# Remove redundant file
rm(subject_df)
rm(subject_data)
# Create a list of all individual data frames
list_df <- lapply(as.numeric(df_filtered$ID), function(i) {
df_name <- paste("df_", i, sep = "")
if (exists(df_name, envir = .GlobalEnv)) {
get(df_name)
} else {
NULL
}
})
## Derive averaged dissimilarity ratings and number of comparisons
# Initialization of empty arrays
comparisons_n <- matrix(0, nrow = ncol(list_df[[1]]), ncol = ncol(list_df[[1]]))
dt_23 <- comparisons_n
# Calculating average values and number of comparisons
for (i in 1:length(list_df)) {
df <- list_df[[i]]
comparisons_n[!is.na(df)] <- comparisons_n[!is.na(df)] + 1
dt_23[!is.na(df)] <- dt_23[!is.na(df)] + as.numeric(df[!is.na(df)])
}
dt_23 <- dt_23 / comparisons_n
# Adding names to rows and columns
colnames(dt_23) <- substance_codes
rownames(dt_23) <- substance_codes
colnames(comparisons_n) <- substance_codes
rownames(comparisons_n) <- substance_codes
# Save the final matrix
dt <- dt_23
rm(df)
# Remove states without the expected number of obtained ratings (Diphenidine,Datura,Benzydamine)
dt <- dt[!(rownames(dt) %in% c("Diph", "Dat","Ben")), ]
dt <- dt[, !(colnames(dt) %in% c("Diph", "Dat","Ben"))]
# Reduce individual DFs to only present substances
for (i in 1:length(list_df)) {
# Define the data frame name dynamically
df_name <- paste("df_", i, sep = "")
# Access the data frame using get() and the dynamic name
df_i <- get(df_name)
# Identify rows and columns with all NAs
rows_with_all_nas <- which(apply(is.na(df_i), 1, all))
cols_with_all_nas <- which(apply(is.na(df_i), 2, all))
# Remove rows and columns with all NAs
df_i <- df_i[-rows_with_all_nas, -cols_with_all_nas]
# Update the data frame in the global environment
assign(df_name, df_i)
}
# Replace NAs with 0 in individual reduced DFs
for (i in 1:length(list_df)) {
# Define the data frame name dynamically
df_name <- paste("df_", i, sep = "")
# Access the data frame using get() and the dynamic name
df_i <- get(df_name)
# Replace NA values with 0
df_i[is.na(df_i)] <- 0
# Update the data frame in the global environment
assign(df_name, df_i)
}
# Extract vectors with values for unique comparisons and save in a data frame
# Initialize an empty data frame for results
results_df <- data.frame(Comparison = character(0), stringsAsFactors = FALSE)
# Loop through unique substance code pairs
for (substance1 in substance_codes) {
for (substance2 in substance_codes) {
if (substance1 != substance2) { # Avoid self-comparisons
# Create a column name for the comparison
col_name <- paste(substance1, "_vs_", substance2, sep = "")
# Initialize a vector to store comparison values
comparison_values <- character(0)
# Loop through individual data frames
for (i in 1:length(list_df)) {
# Access the data frame by name
df_name <- paste("df_", i, sep = "")
df_i <- get(df_name)
# Check if substances exist in the data frame
if (substance1 %in% rownames(df_i) && substance2 %in% colnames(df_i)) {
# Extract the dissimilarity value
value <- df_i[substance1, substance2]
comparison_values <- c(comparison_values, value)
} else {
# Use NA if comparison doesn't exist
comparison_values <- c(comparison_values, NA)
}
}
# Create and add a row for this comparison
comparison_row <- data.frame(Comparison = col_name, t(comparison_values))
results_df <- rbind(results_df, comparison_row)
}
}
}
# Reduce symmetric rows (e.g. Alk_Kod, Kod_Alk)
# Initialize a new data frame for reduced results
results_df_reduced <- data.frame(Comparison = character(0), stringsAsFactors = FALSE)
# Track unique comparisons
unique_comparisons <- character(0)
# Loop through rows in the original results_df
for (i in 1:nrow(results_df)) {
current_comparison <- results_df$Comparison[i]
# Check if symmetric comparison exists
symmetric_comparison <- paste(rev(unlist(strsplit(current_comparison, "_vs_"))), collapse = "_vs_")
if (!(symmetric_comparison %in% unique_comparisons)) {
# Add new unique comparison
unique_comparisons <- c(unique_comparisons, current_comparison)
results_df_reduced <- rbind(results_df_reduced , results_df[i, ])
}
}
# View the reduced results data frame
View(results_df_reduced)
ds <- results_df_reduced
col_num <- ncol(ds) # original number of columns
# Convert columns to numeric
ds[, 2:col_num] <- sapply(ds[, 2:col_num], as.numeric)
# Calculate mean dissimilarity and variance
ds$mean_dissimilarity <- rowMeans(ds[2:col_num], na.rm = TRUE)
ds$variance <- apply(ds[2:col_num], 1, sd, na.rm = TRUE)
codes <- substance_codes
# Initialize data frame for substance-related variables
dss <- data.frame(state = codes, mean_dissimilarity = 0, variance = 0)
# Aggregate data for each substance code
for (code in codes) {
# Find rows in "ds" where the Comparison column contains the code
matching_rows <- ds[grep(code, ds$Comparison), ]
# Calculate mean and standard deviation, excluding NA and NaN
mean_value <- mean(matching_rows$mean_dissimilarity, na.rm = TRUE)
sd_value <- mean(matching_rows$variance, na.rm = TRUE)
# Update the aggregated data frame
dss[dss$state == code, "mean_dissimilarity"] <- mean_value
dss[dss$state == code, "variance"] <- sd_value
}
# Add dissimilarity to baseline variable
dsb <- ds[c(1:22),]
dsb$Comparison <- sub("Baseline_vs_", "", dsb$Comparison)
names(dsb)[names(dsb) == "Comparison"] <- "state"
# Calculate dissimilarity to baseline
dsb$dissimilarity_to_baseline <- rowMeans(dsb[2:col_num], na.rm = TRUE)
new_row <- data.frame(state = "Baseline", dissimilarity_to_baseline = 0)
dsb <- dsb[,c(1, 743)]
dsb <- rbind(dsb, new_row)
dss <- left_join(dss, dsb)
# Add confidence ratings
dc <- df_filtered
dc <- dc[, 538:560] # confidence ratings
colnames(dc) <- sub("Conf.*$", "", colnames(dc))
dc <- sapply(dc, as.numeric)
col_means <- colMeans(dc, na.rm = TRUE) # calculate mean confidence for each substance from all subjects
col_vars <- sqrt(colVars(dc, na.rm = TRUE))# calculate variance of confidence ratings
# Convert the result into a new row and add it to the data frame
dc <- rbind(dc, col_means, col_vars)
Â
2. 2D MDS mappings for invididual subjects (with 10 or more compared states)